import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
class ColumnsData:
date = 'Date'
province = 'Province'
island = 'Island'
cases = 'Total Cases'
deaths = 'Total Deaths'
recovered = 'Total Recovered'
actives_cases = 'Total Active Cases'
population = 'Population'
area = 'Area (km2)'
mortality = 'Mortality'
density = 'Population Density'
def create_bin(df, columns, q=5):
for column in columns:
df[column] = pd.qcut(df[column], q, duplicates='drop').cat.codes
def normalisasi_data(df, columns):
minMaxScaler = MinMaxScaler()
df[columns] = minMaxScaler.fit_transform(d[columns])
data = pd.read_csv('covid19.csv')
pd.options.display.max_columns = None
data.head().T
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| Date | 3/1/2020 | 3/2/2020 | 3/2/2020 | 3/2/2020 | 3/3/2020 |
| Location ISO Code | ID-JK | ID-JK | IDN | ID-RI | ID-JK |
| Location | DKI Jakarta | DKI Jakarta | Indonesia | Riau | DKI Jakarta |
| New Cases | 2 | 2 | 2 | 1 | 2 |
| New Deaths | 0 | 0 | 0 | 0 | 0 |
| New Recovered | 0 | 0 | 0 | 0 | 0 |
| New Active Cases | 2 | 2 | 2 | 1 | 2 |
| Total Cases | 39 | 41 | 2 | 2 | 43 |
| Total Deaths | 20 | 20 | 0 | 0 | 20 |
| Total Recovered | 41 | 41 | 0 | 3 | 41 |
| Total Active Cases | -22 | -20 | 2 | -1 | -18 |
| Location Level | Province | Province | Country | Province | Province |
| City or Regency | NaN | NaN | NaN | NaN | NaN |
| Province | DKI Jakarta | DKI Jakarta | NaN | Riau | DKI Jakarta |
| Country | Indonesia | Indonesia | Indonesia | Indonesia | Indonesia |
| Continent | Asia | Asia | Asia | Asia | Asia |
| Island | Jawa | Jawa | NaN | Sumatera | Jawa |
| Time Zone | UTC+07:00 | UTC+07:00 | NaN | UTC+07:00 | UTC+07:00 |
| Special Status | Daerah Khusus Ibu Kota | Daerah Khusus Ibu Kota | NaN | NaN | Daerah Khusus Ibu Kota |
| Total Regencies | 1 | 1 | 416 | 10 | 1 |
| Total Cities | 5.0 | 5.0 | 98.0 | 2.0 | 5.0 |
| Total Districts | 44 | 44 | 7230 | 169 | 44 |
| Total Urban Villages | 267.0 | 267.0 | 8488.0 | 268.0 | 267.0 |
| Total Rural Villages | NaN | NaN | 74953.0 | 1591.0 | NaN |
| Area (km2) | 664 | 664 | 1916907 | 87024 | 664 |
| Population | 10846145 | 10846145 | 265185520 | 6074100 | 10846145 |
| Population Density | 16334.31 | 16334.31 | 138.34 | 69.8 | 16334.31 |
| Longitude | 106.836118 | 106.836118 | 113.921327 | 101.805109 | 106.836118 |
| Latitude | -6.204699 | -6.204699 | -0.789275 | 0.511648 | -6.204699 |
| New Cases per Million | 0.18 | 0.18 | 0.01 | 0.16 | 0.18 |
| Total Cases per Million | 3.6 | 3.78 | 0.01 | 0.33 | 3.96 |
| New Deaths per Million | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| Total Deaths per Million | 1.84 | 1.84 | 0.0 | 0.0 | 1.84 |
| Total Deaths per 100rb | 0.18 | 0.18 | 0.0 | 0.0 | 0.18 |
| Case Fatality Rate | 51.28% | 48.78% | 0.00% | 0.00% | 46.51% |
| Case Recovered Rate | 105.13% | 100.00% | 0.00% | 150.00% | 95.35% |
| Growth Factor of New Cases | NaN | 1.0 | NaN | NaN | 1.0 |
| Growth Factor of New Deaths | NaN | 1.0 | NaN | NaN | 1.0 |
data.shape
(21759, 38)
data.isnull().sum()
Date 0 Location ISO Code 0 Location 0 New Cases 0 New Deaths 0 New Recovered 0 New Active Cases 0 Total Cases 0 Total Deaths 0 Total Recovered 0 Total Active Cases 0 Location Level 0 City or Regency 21759 Province 642 Country 0 Continent 0 Island 642 Time Zone 642 Special Status 18636 Total Regencies 0 Total Cities 614 Total Districts 0 Total Urban Villages 617 Total Rural Villages 642 Area (km2) 0 Population 0 Population Density 0 Longitude 0 Latitude 0 New Cases per Million 0 Total Cases per Million 0 New Deaths per Million 0 Total Deaths per Million 0 Total Deaths per 100rb 0 Case Fatality Rate 0 Case Recovered Rate 0 Growth Factor of New Cases 1187 Growth Factor of New Deaths 2467 dtype: int64
data.dtypes
Date object Location ISO Code object Location object New Cases int64 New Deaths int64 New Recovered int64 New Active Cases int64 Total Cases int64 Total Deaths int64 Total Recovered int64 Total Active Cases int64 Location Level object City or Regency float64 Province object Country object Continent object Island object Time Zone object Special Status object Total Regencies int64 Total Cities float64 Total Districts int64 Total Urban Villages float64 Total Rural Villages float64 Area (km2) int64 Population int64 Population Density float64 Longitude float64 Latitude float64 New Cases per Million float64 Total Cases per Million float64 New Deaths per Million float64 Total Deaths per Million float64 Total Deaths per 100rb float64 Case Fatality Rate object Case Recovered Rate object Growth Factor of New Cases float64 Growth Factor of New Deaths float64 dtype: object
data = data[[
ColumnsData.date,
ColumnsData.province,
ColumnsData.island,
ColumnsData.cases,
ColumnsData.deaths,
ColumnsData.recovered,
ColumnsData.actives_cases,
ColumnsData.population,
ColumnsData.area,
ColumnsData.density
]]
data.isnull().sum()
Date 0 Province 642 Island 642 Total Cases 0 Total Deaths 0 Total Recovered 0 Total Active Cases 0 Population 0 Area (km2) 0 Population Density 0 dtype: int64
data = data.dropna(axis=0, how="any")
data.isnull().sum()
Date 0 Province 0 Island 0 Total Cases 0 Total Deaths 0 Total Recovered 0 Total Active Cases 0 Population 0 Area (km2) 0 Population Density 0 dtype: int64
data['Total Active Cases'] = data['Total Active Cases'].clip(lower=0)
data.head()
| Date | Province | Island | Total Cases | Total Deaths | Total Recovered | Total Active Cases | Population | Area (km2) | Population Density | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3/1/2020 | DKI Jakarta | Jawa | 39 | 20 | 41 | 0 | 10846145 | 664 | 16334.31 |
| 1 | 3/2/2020 | DKI Jakarta | Jawa | 41 | 20 | 41 | 0 | 10846145 | 664 | 16334.31 |
| 3 | 3/2/2020 | Riau | Sumatera | 2 | 0 | 3 | 0 | 6074100 | 87024 | 69.80 |
| 4 | 3/3/2020 | DKI Jakarta | Jawa | 43 | 20 | 41 | 0 | 10846145 | 664 | 16334.31 |
| 6 | 3/3/2020 | Jawa Barat | Jawa | 1 | 1 | 8 | 0 | 45161325 | 35378 | 1276.55 |
data['Total Active Cases'] = data['Total Active Cases'].clip(lower=0)
data[ColumnsData.date] = pd.to_datetime(data.Date, infer_datetime_format=True).dt.date
data.head()
| Date | Province | Island | Total Cases | Total Deaths | Total Recovered | Total Active Cases | Population | Area (km2) | Population Density | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-03-01 | DKI Jakarta | Jawa | 39 | 20 | 41 | 0 | 10846145 | 664 | 16334.31 |
| 1 | 2020-03-02 | DKI Jakarta | Jawa | 41 | 20 | 41 | 0 | 10846145 | 664 | 16334.31 |
| 3 | 2020-03-02 | Riau | Sumatera | 2 | 0 | 3 | 0 | 6074100 | 87024 | 69.80 |
| 4 | 2020-03-03 | DKI Jakarta | Jawa | 43 | 20 | 41 | 0 | 10846145 | 664 | 16334.31 |
| 6 | 2020-03-03 | Jawa Barat | Jawa | 1 | 1 | 8 | 0 | 45161325 | 35378 | 1276.55 |
data[ColumnsData.mortality] = data[ColumnsData.deaths] / data[ColumnsData.cases]
data.head().T
| 0 | 1 | 3 | 4 | 6 | |
|---|---|---|---|---|---|
| Date | 2020-03-01 | 2020-03-02 | 2020-03-02 | 2020-03-03 | 2020-03-03 |
| Province | DKI Jakarta | DKI Jakarta | Riau | DKI Jakarta | Jawa Barat |
| Island | Jawa | Jawa | Sumatera | Jawa | Jawa |
| Total Cases | 39 | 41 | 2 | 43 | 1 |
| Total Deaths | 20 | 20 | 0 | 20 | 1 |
| Total Recovered | 41 | 41 | 3 | 41 | 8 |
| Total Active Cases | 0 | 0 | 0 | 0 | 0 |
| Population | 10846145 | 10846145 | 6074100 | 10846145 | 45161325 |
| Area (km2) | 664 | 664 | 87024 | 664 | 35378 |
| Population Density | 16334.31 | 16334.31 | 69.8 | 16334.31 | 1276.55 |
| Mortality | 0.512821 | 0.487805 | 0.0 | 0.465116 | 1.0 |
dfl = data[[ColumnsData.date, ColumnsData.cases, ColumnsData.deaths,
ColumnsData.recovered]].groupby(ColumnsData.date).sum().reset_index()
dfl = dfl[(dfl[ColumnsData.cases] >= 100)].melt(id_vars=ColumnsData.date,
value_vars=[ColumnsData.cases,
ColumnsData.deaths, ColumnsData.recovered])
plot_a = px.line(dfl, x=ColumnsData.date, y='value', color='variable', template="plotly_white")
plot_a.update_layout(title='COVID-19 in Indonesia: total number of cases over time',
xaxis_title='Indonesia', yaxis_title='Number of cases',
legend=dict(x=0.02, y=0.98))
plot_a.show()
pd.options.mode.chained_assignment = None
limit = 5
group = data.groupby(ColumnsData.province)
t = group.tail(1).sort_values(ColumnsData.cases, ascending=False).set_index(ColumnsData.province).drop(
columns=[ColumnsData.date])
s = data[(data[ColumnsData.province].isin([i for i in t.index[:limit]]))]
s = s[(s[ColumnsData.cases] >= 1000)]
plot_b = px.line(s, x=ColumnsData.date, y=ColumnsData.cases, color=ColumnsData.province, template="plotly_white")
plot_b.update_layout(title='COVID-19 in Indonesia: total number of cases over time',
xaxis_title=ColumnsData.date, yaxis_title='Number of cases',
legend_title='<b>Top %s provinces</b>' % limit,
legend=dict(x=0.02, y=0.98))
plot_b.show()
heatmap = data[(data[ColumnsData.cases] >= 100)].sort_values([ColumnsData.date, ColumnsData.province])
vis_hmap = go.Figure(data=go.Heatmap(
z=heatmap[ColumnsData.cases],
x=heatmap[ColumnsData.date],
y=heatmap[ColumnsData.province],
colorscale='Plasma'))
vis_hmap.update_layout(
title='COVID-19 in Indonesia: number of cases over time', xaxis_nticks=20)
vis_hmap.show()
corr = t.corr().iloc[[0, 1]].transpose()
corr = corr[(corr[ColumnsData.cases] > 0.25)].sort_values(ColumnsData.cases, ascending=False)
features = corr.index.tolist()
features.append(ColumnsData.mortality)
print('Selected features:', features)
d = t[features].copy()
d.head(10)
Selected features: ['Total Cases', 'Total Recovered', 'Total Deaths', 'Population Density', 'Population', 'Total Active Cases', 'Mortality']
| Total Cases | Total Recovered | Total Deaths | Population Density | Population | Total Active Cases | Mortality | |
|---|---|---|---|---|---|---|---|
| Province | |||||||
| DKI Jakarta | 864045 | 849875 | 13596 | 16334.31 | 10846145 | 574 | 0.015735 |
| Jawa Barat | 707934 | 692101 | 14737 | 1276.55 | 45161325 | 1096 | 0.020817 |
| Jawa Tengah | 486435 | 454837 | 30225 | 1108.64 | 36364072 | 1373 | 0.062136 |
| Jawa Timur | 399478 | 369537 | 29697 | 846.78 | 40479023 | 244 | 0.074340 |
| Kalimantan Timur | 158245 | 152714 | 5449 | 27.52 | 3552191 | 82 | 0.034434 |
| Daerah Istimewa Yogyakarta | 156769 | 150965 | 5263 | 1158.90 | 3631015 | 541 | 0.033572 |
| Banten | 132693 | 129872 | 2688 | 1109.64 | 10722374 | 133 | 0.020257 |
| Riau | 128825 | 124123 | 4109 | 69.80 | 6074100 | 593 | 0.031896 |
| Bali | 114233 | 110003 | 4046 | 729.43 | 4216171 | 184 | 0.035419 |
| Sulawesi Selatan | 109919 | 107630 | 2238 | 201.78 | 9426885 | 51 | 0.020360 |
create_bin(d, [
ColumnsData.cases,
ColumnsData.recovered,
ColumnsData.density,
ColumnsData.actives_cases,
ColumnsData.deaths,
ColumnsData.population,
ColumnsData.mortality
], q=8)
normalisasi_data(d, d.columns)
d.head(20).T
| Province | DKI Jakarta | Jawa Barat | Jawa Tengah | Jawa Timur | Kalimantan Timur | Daerah Istimewa Yogyakarta | Banten | Riau | Bali | Sulawesi Selatan | Sumatera Utara | Sumatera Barat | Kalimantan Selatan | Nusa Tenggara Timur | Sumatera Selatan | Kepulauan Riau | Kepulauan Bangka Belitung | Lampung | Sulawesi Tengah | Kalimantan Tengah |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Total Cases | 1.0 | 1.000000 | 1.0 | 1.000000 | 1.000000 | 0.857143 | 0.857143 | 0.857143 | 0.857143 | 0.714286 | 0.714286 | 0.714286 | 0.714286 | 0.571429 | 0.571429 | 0.571429 | 0.571429 | 0.428571 | 0.428571 | 0.428571 |
| Total Recovered | 1.0 | 1.000000 | 1.0 | 1.000000 | 1.000000 | 0.857143 | 0.857143 | 0.857143 | 0.857143 | 0.714286 | 0.714286 | 0.714286 | 0.714286 | 0.571429 | 0.571429 | 0.571429 | 0.571429 | 0.428571 | 0.428571 | 0.428571 |
| Total Deaths | 1.0 | 1.000000 | 1.0 | 1.000000 | 1.000000 | 0.857143 | 0.714286 | 0.857143 | 0.857143 | 0.571429 | 0.714286 | 0.571429 | 0.714286 | 0.428571 | 0.714286 | 0.571429 | 0.428571 | 0.857143 | 0.428571 | 0.428571 |
| Population Density | 1.0 | 1.000000 | 1.0 | 0.857143 | 0.000000 | 1.000000 | 1.000000 | 0.285714 | 0.857143 | 0.714286 | 0.714286 | 0.571429 | 0.571429 | 0.571429 | 0.428571 | 0.714286 | 0.285714 | 0.857143 | 0.142857 | 0.000000 |
| Population | 1.0 | 1.000000 | 1.0 | 1.000000 | 0.428571 | 0.428571 | 0.857143 | 0.714286 | 0.571429 | 0.857143 | 1.000000 | 0.714286 | 0.428571 | 0.714286 | 0.857143 | 0.142857 | 0.000000 | 0.857143 | 0.285714 | 0.285714 |
| Total Active Cases | 1.0 | 1.000000 | 1.0 | 0.857143 | 0.428571 | 0.857143 | 0.714286 | 1.000000 | 0.714286 | 0.428571 | 0.571429 | 0.571429 | 0.285714 | 0.714286 | 0.142857 | 0.000000 | 0.571429 | 0.857143 | 0.285714 | 0.428571 |
| Mortality | 0.0 | 0.142857 | 1.0 | 1.000000 | 0.857143 | 0.714286 | 0.000000 | 0.714286 | 0.857143 | 0.142857 | 0.428571 | 0.285714 | 0.857143 | 0.142857 | 1.000000 | 0.714286 | 0.428571 | 1.000000 | 0.714286 | 0.571429 |
X = d[['Total Cases', 'Total Recovered', 'Population Density', 'Total Active Cases', 'Total Deaths', 'Population', 'Mortality']]
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
#Elbow Method - Inertia plot
inertia = []
for k in range(1, 10):
cluster_model = KMeans(n_clusters = k, random_state = 24)
cluster_model.fit(X)
inertia_value = cluster_model.inertia_
inertia.append(inertia_value)
fig, ax = plt.subplots(figsize=(18, 16))
plt.plot(range(1, 10), inertia)
plt.title('The Elbow Method - Inertia plot', fontsize = 20)
plt.xlabel('No. of Clusters')
plt.ylabel('WCSS')
plt.show()
C:\Users\raditya\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:881: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
X = d[['Total Cases', 'Total Recovered', 'Population Density', 'Total Active Cases', 'Total Deaths', 'Population', 'Mortality']] .values # Ambil value/nilai atribut Spending Score dan Annual Income
kmeans = KMeans(n_clusters=7, # Jumlah cluster K
init='k-means++', # Metode inisialisasi centroid
random_state=111)
y_kmeans = kmeans.fit_predict(X) # Fit dan prediksi dataset
y_kmeans
array([0, 0, 2, 2, 4, 2, 0, 4, 2, 6, 6, 6, 3, 6, 3, 3, 3, 2, 3, 3, 6, 3,
1, 1, 5, 1, 1, 5, 1, 1, 1, 1, 1, 1])
plt.figure(1, figsize=(12, 5))
plt.scatter(X[y_kmeans==0, 0], X[y_kmeans==0, 1], s=50, c='red', label='Cluster 1')
plt.scatter(X[y_kmeans==1, 0], X[y_kmeans==1, 1], s=50, c='blue', label='Cluster 2')
plt.scatter(X[y_kmeans==2, 0], X[y_kmeans==2, 1], s=50, c='green', label='Cluster 3')
plt.scatter(X[y_kmeans==3, 0], X[y_kmeans==3, 1], s=50, c='cyan', label='Cluster 4')
plt.scatter(X[y_kmeans==4, 0], X[y_kmeans==4, 1], s=50, c='black', label='Cluster 5')
plt.scatter(X[y_kmeans==5, 0], X[y_kmeans==5, 1], s=50, c='orange', label='Cluster 6')
plt.scatter(X[y_kmeans==6, 0], X[y_kmeans==6, 1], s=50, c='yellow', label='Cluster 7')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='purple', label='Centroids')
plt.title('Clusters of customers')
plt.xlabel('Province')
plt.ylabel('Deaths per case')
plt.legend()
plt.show()
X = d[['Total Cases', 'Total Recovered', 'Population Density', 'Total Active Cases', 'Total Deaths', 'Population', 'Mortality']] .values # Ambil value/nilai atribut Spending Score dan Annual Income
kmeans = KMeans(n_clusters=6, # Jumlah cluster K
init='k-means++', # Metode inisialisasi centroid
random_state=111)
y_kmeans = kmeans.fit_predict(X) # Fit dan prediksi dataset
y_kmeans
array([3, 3, 1, 1, 2, 1, 3, 1, 1, 5, 5, 5, 2, 5, 2, 2, 4, 1, 4, 4, 4, 2,
0, 4, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0])
plt.figure(1, figsize=(12, 5))
plt.scatter(X[y_kmeans==0, 0], X[y_kmeans==0, 1], s=50, c='red', label='Cluster 1')
plt.scatter(X[y_kmeans==1, 0], X[y_kmeans==1, 1], s=50, c='blue', label='Cluster 2')
plt.scatter(X[y_kmeans==2, 0], X[y_kmeans==2, 1], s=50, c='green', label='Cluster 3')
plt.scatter(X[y_kmeans==3, 0], X[y_kmeans==3, 1], s=50, c='cyan', label='Cluster 4')
plt.scatter(X[y_kmeans==4, 0], X[y_kmeans==4, 1], s=50, c='black', label='Cluster 5')
plt.scatter(X[y_kmeans==5, 0], X[y_kmeans==5, 1], s=50, c='pink', label='Cluster 6')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='purple', label='Centroids')
plt.title('Clusters of customers')
plt.xlabel('Province')
plt.ylabel('Deaths per case')
plt.legend()
plt.show()
kmeans = KMeans(n_clusters=6)
pred = kmeans.fit_predict(d[d.columns])
t['K-means Cluster Results'], d['K-means Cluster Results'] = [pred, pred]
d[d.columns].sort_values(['K-means Cluster Results', ColumnsData.mortality,
ColumnsData.cases, ColumnsData.actives_cases,
ColumnsData.density], ascending=True)
| Total Cases | Total Recovered | Total Deaths | Population Density | Population | Total Active Cases | Mortality | K-means Cluster Results | |
|---|---|---|---|---|---|---|---|---|
| Province | ||||||||
| Daerah Istimewa Yogyakarta | 0.857143 | 0.857143 | 0.857143 | 1.000000 | 0.428571 | 0.857143 | 0.714286 | 0 |
| Riau | 0.857143 | 0.857143 | 0.857143 | 0.285714 | 0.714286 | 1.000000 | 0.714286 | 0 |
| Bali | 0.857143 | 0.857143 | 0.857143 | 0.857143 | 0.571429 | 0.714286 | 0.857143 | 0 |
| Lampung | 0.428571 | 0.428571 | 0.857143 | 0.857143 | 0.857143 | 0.857143 | 1.000000 | 0 |
| Jawa Timur | 1.000000 | 1.000000 | 1.000000 | 0.857143 | 1.000000 | 0.857143 | 1.000000 | 0 |
| Jawa Tengah | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0 |
| Sulawesi Selatan | 0.714286 | 0.714286 | 0.571429 | 0.714286 | 0.857143 | 0.428571 | 0.142857 | 1 |
| Sumatera Barat | 0.714286 | 0.714286 | 0.571429 | 0.571429 | 0.714286 | 0.571429 | 0.285714 | 1 |
| Sumatera Utara | 0.714286 | 0.714286 | 0.714286 | 0.714286 | 1.000000 | 0.571429 | 0.428571 | 1 |
| Maluku | 0.000000 | 0.000000 | 0.000000 | 0.142857 | 0.142857 | 0.000000 | 0.000000 | 2 |
| Papua Barat | 0.142857 | 0.142857 | 0.000000 | 0.000000 | 0.000000 | 0.857143 | 0.000000 | 2 |
| Bengkulu | 0.142857 | 0.142857 | 0.142857 | 0.428571 | 0.142857 | 0.142857 | 0.142857 | 2 |
| Maluku Utara | 0.000000 | 0.000000 | 0.000000 | 0.142857 | 0.000000 | 0.285714 | 0.285714 | 2 |
| Kalimantan Utara | 0.285714 | 0.285714 | 0.285714 | 0.000000 | 0.000000 | 0.142857 | 0.285714 | 2 |
| Sulawesi Tenggara | 0.000000 | 0.000000 | 0.142857 | 0.285714 | 0.285714 | 0.428571 | 0.428571 | 2 |
| Jambi | 0.142857 | 0.142857 | 0.142857 | 0.285714 | 0.428571 | 0.000000 | 0.428571 | 2 |
| Sulawesi Barat | 0.000000 | 0.000000 | 0.000000 | 0.428571 | 0.142857 | 0.000000 | 0.571429 | 2 |
| Gorontalo | 0.000000 | 0.000000 | 0.000000 | 0.571429 | 0.000000 | 0.000000 | 0.857143 | 2 |
| Papua | 0.285714 | 0.285714 | 0.142857 | 0.000000 | 0.571429 | 1.000000 | 0.000000 | 3 |
| Nusa Tenggara Timur | 0.571429 | 0.571429 | 0.428571 | 0.571429 | 0.714286 | 0.714286 | 0.142857 | 3 |
| Kalimantan Barat | 0.428571 | 0.428571 | 0.285714 | 0.142857 | 0.714286 | 0.285714 | 0.285714 | 3 |
| Kepulauan Bangka Belitung | 0.571429 | 0.571429 | 0.428571 | 0.285714 | 0.000000 | 0.571429 | 0.428571 | 3 |
| Nusa Tenggara Barat | 0.142857 | 0.142857 | 0.285714 | 0.857143 | 0.571429 | 0.571429 | 0.571429 | 3 |
| Sulawesi Utara | 0.285714 | 0.285714 | 0.285714 | 0.714286 | 0.285714 | 0.571429 | 0.571429 | 3 |
| Kalimantan Tengah | 0.428571 | 0.428571 | 0.428571 | 0.000000 | 0.285714 | 0.428571 | 0.571429 | 3 |
| Sulawesi Tengah | 0.428571 | 0.428571 | 0.428571 | 0.142857 | 0.285714 | 0.285714 | 0.714286 | 3 |
| Kepulauan Riau | 0.571429 | 0.571429 | 0.571429 | 0.714286 | 0.142857 | 0.000000 | 0.714286 | 4 |
| Kalimantan Selatan | 0.714286 | 0.714286 | 0.714286 | 0.571429 | 0.428571 | 0.285714 | 0.857143 | 4 |
| Kalimantan Timur | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.428571 | 0.428571 | 0.857143 | 4 |
| Aceh | 0.285714 | 0.285714 | 0.571429 | 0.428571 | 0.571429 | 0.142857 | 1.000000 | 4 |
| Sumatera Selatan | 0.571429 | 0.571429 | 0.714286 | 0.428571 | 0.857143 | 0.142857 | 1.000000 | 4 |
| Banten | 0.857143 | 0.857143 | 0.714286 | 1.000000 | 0.857143 | 0.714286 | 0.000000 | 5 |
| DKI Jakarta | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 5 |
| Jawa Barat | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.142857 | 5 |
vis_tmap = px.treemap(t.reset_index(), path=['K-means Cluster Results', ColumnsData.province], values=ColumnsData.cases)
vis_tmap.update_layout(title='K-means clusters untuk kasus di setiap provinsi')
vis_tmap.show()
vis_tmap = px.treemap(t.reset_index(), path=['K-means Cluster Results', ColumnsData.province], values=ColumnsData.mortality)
vis_tmap.update_layout(title='K-means clusters untuk rata rata kematian di setiap provinsi')
vis_tmap.show()
c = t.sort_values(['K-means Cluster Results', ColumnsData.cases], ascending=False)
data = [go.Bar(x=c[(c['K-means Cluster Results'] == i)].index, y=c[(c['K-means Cluster Results'] == i)][ColumnsData.cases],
text=c[(c['K-means Cluster Results'] == i)][ColumnsData.cases], name=i) for i in range(0, 10)]
vis_bar = go.Figure(data=data)
vis_bar.update_layout(title='K-means Clustering: kasus di setiap provinsi',
xaxis_title='Indonesia State', yaxis_title='Deaths per case')
vis_bar.show()
# visualization mortality rate by clusters
c = t.sort_values(['K-means Cluster Results', ColumnsData.mortality], ascending=False)
data = [go.Bar(x=c[(c['K-means Cluster Results'] == i)].index, y=c[(c['K-means Cluster Results'] == i)][ColumnsData.mortality],
text=c[(c['K-means Cluster Results'] == i)][ColumnsData.mortality], name=i) for i in range(0, 10)]
data.append(
go.Scatter(
x=t.sort_values(ColumnsData.mortality, ascending=False).index,
y=np.full((1, len(t.index)), 0.03).tolist()[0],
marker_color='black',
name='Indonesian avg'
)
)
vis_bar2 = go.Figure(data=data)
vis_bar2.update_layout(title='K-means Clustering: rata rata kematian di setiap provinsi',
xaxis_title='Indonesian states', yaxis_title='Deaths per case')
vis_bar2.show()